
# This file contains R script that replicates the results reported in
# Hana M. Broulíková, Peter Huber, Josef Montag, and Petr Sunega. 2018.
# "Homeownership, Mobility, and Unemployment: Evidence from Housing
# Privatization."

# Author: Josef Montag | josef.montag@gmail.com
# ISE KBTU

# January 2, 2018 

# The code replicates only results pertaining to LiTS data, that is Tables 2
# through 6 and the Appendix Tables A1 through A3. (For replication of the
# results pertaining to GSOEP data (Tables # 7 through 10), see the code in file
# "3_Replication_Code_GSOEP.R.")

# Note that the LiTS 2010 data is not part of this replication package. The code
# will download it from the EBRD website http://www.ebrd.com/downloads/research
# /surveys/lits2.dta (last accessed January 2, 2018).

# You need to have installed the libraries required by the code in the section
# "Preliminaries" below.

# The script was tested to work in R version 3.4.2 (2017-09-28), R Core Team.
# 2017. "R: A language and environment for statistical computing." R Foundation
# for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.

# This file is sectioned using Vim wrapping delimiters: 
# "{{{1" denotes a top-level heading
# "{{{2" denotes a second-level heading

# Preliminaries {{{1

options(width = 350, digits = 3)

library(multiwayvcov)
library(readstata13)
library(dplyr)
library(xtable)
library(stargazer)

# Load LiTS 2010 data {{{1

# Check whether LiTS 2010 available. If not, download it:
if(!any(list.files() == 'lits2.dta')) {
     download.file('http://www.ebrd.com/downloads/research/surveys/lits2.dta',
                   destfile = 'lits2.dta')
}

# Load LiTS 2010 data:
lits <- read.dta13('lits2.dta', nonint.factors = TRUE)

# Data recoding {{{1

# Create and assign meaningful variable names {{{2

nams <- c(Size_of_   = 'hhsize',
          Males_in   = 'nmales',
          Adults_i   = 'nadults',
          Children   = 'nchildren',
          q201       = 'dwell.type',
          q202       = 'homeown',
          q204       = 'obtained',
          q205       = 'mortgage.now',
          q501       = 'worked12',
          q515       = 'educ',
          q522       = 'jobsearch',
          q528       = 'move.within',
          q529       = 'move.abroad',
          q537       = 'take.risk',
          q701       = 'married',
          q703       = 'flang',
          q705       = 'yearres',
          q714a      = 'comm.resp',
          q714b      = 'comm.father',
          q714c      = 'comm.mother',
          XCweight   = 'weight.federalist'
          )

# Create variable names for characteristics of individuals in HH:
seqnew <- c('sex.p', 'reltohead.p', 'age.p')
seqold <- c('q102_', 'q103_', 'q104a_')
e <- 12
seqnew <- paste0(rep(seqnew, each = e), 1:12)
names(seqnew) <- paste0(rep(seqold, each = e), 1:12)

nams <- c(nams, seqnew)
rm(e, seqnew, seqold)

# Rename variables:
lits <- plyr::rename(lits, nams)

# Data recoding and creating useful variables {{{2

# Create variables with respondent's characteristics:
lits <- cbind(lits, t(
    sapply(1:nrow(lits), function(r) {
           sel <- lits$select[r]
           as.numeric(lits[r, paste0(c('sex.p', 'age.p', 'reltohead.p'), sel)])
                          })))
names(lits)[(ncol(lits) - 2):ncol(lits)] <- 
  c('sex.r', 'age.r', 'reltohead.r')

# Encode PSU variable:
Encoding(lits$Region1) <- 'latin1'

# Recode variable values:
lits <- within(lits, {
  own.type    <- factor(as.numeric(obtained), c(4:10),
                                  c('Rented', 'NA', 'Privatized', 'Buy.mort',
                                    'Buy.no.mort', 'Coop', 'Inher'))
  renter      <- own.type == 'Rented'
  howner      <- own.type %in% c('Privatized', 'Buy.mort', 'Buy.no.mort',
                                 'Coop', 'Inher')
  privatized   <- own.type == 'Privatized'
  howner.nonpriv <- howner & !privatized
  inherited    <- own.type == 'Inher'
  coop         <- own.type == 'Coop'
  mortgage.now <- mortgage.now == 'Yes'
  move.within  <- move.within == 'Yes'
  move.abroad  <- move.abroad == 'Yes'
  move.willing <- move.within | move.abroad
  worked12     <- worked12   == 'Yes'
  country      <- countryname
  country      <- ifelse(country == 'Czech', 'Czech Republic', country)
  country      <- ifelse(country == 'UK', 'Great Britain', country)
  country      <- ifelse(country == 'BiH', 'Bosnia and Herzegovina', country)
  country      <- ifelse(country == 'FYROM', 'Macedonia', country)
  country      <- ifelse(country == 'Germany',
                         ifelse(trimws(Region1) %in% c('Brandenburg',
                                'Mecklenburg-Vorpommern', 'Saxony',
                                'Saxony-Anhalt', 'Thuringia'), 
                                'East Germany',
                                'West Germany'),
                         country)
  co.grp       <- factor(ifelse(country %in% c('France',
                                               'Great Britain',
                                               'Italy',
                                               'Sweden',
                                               'West Germany'),
                               'WEurope',
                         ifelse(country %in% c('Bulgaria',
                                               'Czech Republic',
                                               'East Germany',
                                               'Estonia',
                                               'Latvia',
                                               'Lithuania',
                                               'Hungary',
                                               'Poland',
                                               'Romania',
                                               'Slovakia',
                                               'Slovenia'), 
                                'CEE',
                         ifelse(country %in% c('Albania',
                                               'Bosnia and Herzegovina',
                                               'Croatia',
                                               'Macedonia',
                                               'Serbia',
                                               'Kosovo',
                                               'Montenegro'),
                                'Balkans',
                         ifelse(country %in% c('Armenia',
                                               'Azerbaijan',
                                               'Belarus',
                                               'Estonia',
                                               'Georgia',
                                               'Kazakhstan',
                                               'Kyrgyzstan',
                                               'Latvia',
                                               'Lithuania',
                                               'Moldova',
                                               'Mongolia',
                                               'Russia',
                                               'Tajikistan',
                                               'Ukraine',
                                               'Uzbekistan'), 
                                'FSU',
                                NA)))),
                        levels = c('WEurope', 'CEE', 'Balkans', 'FSU'))
  Baltic       <- country %in% c('Estonia', 'Latvia', 'Lithuania')
  sex.r        <- sex.r == 2 * 1
  sex.p1       <- sex.p1 == 'Female'
  child        <- nchildren > 0
  educ         <- plyr::mapvalues(educ, c(-1,  1:7),
                                        c(0, 8, 11, 12, 13, 14, 15, 17))
  married      <- married == 'MARRIED'
  empl         <- q503_1 == 1 | q503_2 == 1 | q503_3 == 1 | q503_4 == 1 | 
                  q503_5 == 1 
  inact        <- !empl & !jobsearch %in% 1:2 
  unempl       <- !empl & jobsearch %in% 1:2
  ltunemp      <- unempl & !worked12
  flang        <- flang != 'Yes'
  yearres      <- ifelse(yearres == 98, age.r, yearres)
  jobsearch    <- jobsearch %in% 1#:2
  onjobsearch  <- ifelse(empl, jobsearch, NA)
  comm.resp    <- comm.resp == 'Respondent'
  comm.father  <- comm.father == 'Father'
  comm.mother  <- comm.mother == 'Mother'
  comm.parents <- comm.father | comm.mother
  m.eu         <- ifelse(empl, 0, ifelse(unempl & worked12, 1, NA))
  m.eu2        <- ifelse(empl, 0, ifelse(inact & worked12, 1, NA))
  m.eu3        <- m.eu | m.eu2
  m.eu4        <- ifelse(empl, 0, ifelse(!empl & !ltunemp, 1, NA))
  njobs        <- rowSums(lits[grep('503_', names(lits))] == 1)
  hhsize       <- as.numeric(hhsize)
  Region1      <- ifelse(country == 'Sweden', psu, Region1)
          })

# Subset data to produce the analysis sample and compute weights {{{1

# Subset data vertically:
nams <- c('SerialID', 'country', 'co.grp', 'Baltic', 'Region1', 'age.r',
          'sex.r', 'renter', 'howner', 'own.type', 'privatized',
          'howner.nonpriv', 'inherited', 'coop', 'move.willing', 'empl',
          'unempl', 'ltunemp', 'inact', 'flang', 'yearres', 'onjobsearch',
          'child', 'comm.parents', nams)
lits <- lits[nams]

# Clean data from NAs:
lits[lits == -1 | lits == -99 | lits == -98 | lits == -97] <- NA

lits <- 
  subset(lits, age.r %in% 18:65 & 
         !country %in% c('Turkey', 'Mongolia', 'France', 'Bulgaria') &
         own.type != 'NA' &
         complete.cases(lits[c('age.r', 'yearres', 'take.risk')])
                     )
lits <- droplevels(lits)

# Produce federalist weights for analysis dataset:
wgts.priv <- with(subset(lits, privatized | renter), table(co.grp, country))
wgts.priv <- 1 / colSums(wgts.priv / rowSums(wgts.priv))
wgts.priv <- data.frame(wgts.priv)
wgts.priv$country <- rownames(wgts.priv)
lits <- merge(lits, wgts.priv)
rm(wgts.priv)
lits$wgts.priv.federalist <- with(lits, wgts.priv * weight.federalist)

main.vars <- c('move.willing', 'unempl', 'sex.r', 'age.r', 'educ',
  'married', 'hhsize', 'nchildren', 'take.risk', 'comm.resp', 'comm.parents',
  'flang')        

# TABLE 2: Homeownership structure by country {{{1

# Estimate the results {{{2:
summ.countries <- with(lits,
  sapply(levels(co.grp), function(x)
    {
      tbl <- table(country[co.grp == x, drop = T], own.type[co.grp == x])
      round(cbind(prop.table(tbl, 1), as.integer(margin.table(tbl, 1))), 2)
    }))


# Print results for TABLE 2 {{{2:

# CEE countries: 
summ.countries['CEE']

# Western Europe: 
summ.countries['WEurope']

# Balkans (not in the paper): 
summ.countries['Balkans']

# Former Soviet Union (not in the paper): 
summ.countries['FSU']

# TABLE 3: Summary statistics individuals {{{1

# Estimate the results {{{2:
summ.indiv <- sapply(levels(lits$co.grp), simplify = FALSE, USE.NAMES = TRUE,
  function(gr) {
    lits.desc <- subset(lits, co.grp == gr)
    mns <- t(sapply(lits.desc[main.vars], function(x) {
      x.renter             = x[lits.desc$renter]
      x.privatized         = x[lits.desc$privatized]
      x.howner.nonpriv     = x[lits.desc$howner.nonpriv]
      mean.renter          = mean(x.renter, na.rm = TRUE)
      mean.privatized      = mean(x.privatized, na.rm = TRUE)
      mean.full            = mean(x, na.rm = TRUE)
      mean.howner.nonpriv  = mean(x.howner.nonpriv, na.rm = TRUE)
      test.priv.rent       = t.test(x.privatized, x.renter)$p.value
      test.nonpriv.rent    = t.test(x.howner.nonpriv, x.renter)$p.value
      format(round(c(mean.full, mean.renter, mean.privatized,
             mean.howner.nonpriv, test.priv.rent, test.nonpriv.rent), 2),
             nsmall = 2)
                              }))
    mns[, 5:6] <- ifelse(mns[, 5:6] < 0.01, '<0.01',  mns[, 5:6])
    mns <- gsub('NaN', '-', mns)
    colnames(mns) <- c('Full sample', 'Renters', 'Privatizers', 
      'Howners non-Privatizers', 'P. v R. (p-val.)', 'NonP. v R.')
    rbind(mns, Observations = c(nrow(lits.desc), sum(lits.desc$renter),
                                       sum(lits.desc$privatized),
                                       sum(lits.desc$howner.nonpriv), NA, NA))
})

# Print results for TABLE 3 {{{2:

# CEE countries: 
summ.indiv['CEE']

# Western Europe: 
summ.indiv['WEurope']

# Balkans (not in the paper): 
summ.indiv['Balkans']

# Former Soviet Union (not in the paper): 
summ.indiv['FSU']

# TABLES 4, 5, A2, A3 {{{1

# Functions for reporting regressions {{{2

# Get robust standard errors:
get.se <- function() {
  lapply(models, function(est) {
    # sqrt(diag(vcovHC(est, type = 'HC1')))
    sqrt(diag(cluster.vcov(est, est$model$Region1)))
  })
}

# Standard regression table:
reg.table.show <- function() {
  stargazer(models, se = get.se(),
            type = 'text',
            keep.stat = c('n', 'adj.rsq'),
            # keep = 'howner1',
            omit = c('Region1'),
            digits = 3,
            no.space = TRUE,
            model.names = FALSE,
            dep.var.labels.include = TRUE)
}

# Regression table for robustness check:
reg.table.stub <- function() {
  regs <- stargazer(models, se = get.se(),
                    type = 'text',
                    keep = 'howner',
                    digits = 3,
                    single.row = TRUE,
                    star.char = c('+', '*'),
                    star.cutoffs = c(.05, .01),
                    keep.stat = c('n', 'adj.rsq'),
                    align = TRUE,
                    table.layout = 't',
                    no.space = TRUE,
                    float = FALSE,
                    header = FALSE)
  assign('regs', regs, 1)
}

# Define baseline regression specifications {{{2

lm1 <- lm(move.willing ~ Region1 + howner + age.r + I(age.r^2 / 100), 
          data = lits, 
          weights = weight.federalist,
          subset = co.grp == 'CEE') 
lm2 <- update(lm1, . ~ . + take.risk + 
                          educ + 
                          sex.r * (married + nchildren + nadults) +
                          flang +
                          comm.resp +
                           comm.parents 
                           ) 
lm3 <- update(lm1, unempl ~ .) 
lm4 <- update(lm2, unempl ~ .) 

# Set up loops and estimate main regressions for tables 4, 5, A2, A3 {{{2

grp <- c('CEE', 'WEurope', 'Balkans', 'FSU')
lm.n <- paste0('lm', 1:4)

lm.fullsamp <- function() { 
  update(get0(num), subset = co.grp == g) 
}

lm.privatizers <- function() {
  update(get(num), subset = co.grp == g & (renter | privatized), 
         weights = wgts.priv.federalist)
}

get.regs <- function(reg) {
  out <- list()
  for(g in grp) {
    est0 <- list()
    g <<- g
    for(n in lm.n) {
      num <<- n
      est0[[n]] <- reg()
    }
    out[[g]] <- est0
  }
  out
}

ests.fullsamp <- get.regs(lm.fullsamp)

ests.privatizers <- get.regs(lm.privatizers)

# Print TABLE 4: Homeownership and mobility {{{2

models <- c(
            ests.fullsamp[['CEE']][c('lm1', 'lm2')],
            ests.privatizers[['CEE']][c('lm1', 'lm2')], 
            ests.fullsamp[['WEurope']][c('lm1', 'lm2')],     
            ests.privatizers[['WEurope']][c('lm1', 'lm2')])
reg.table.show()

# Print TABLE 5: Homeownership and unemployment {{{2

models <- c(
            ests.fullsamp[['CEE']][c('lm3', 'lm4')],
            ests.privatizers[['CEE']][c('lm3', 'lm4')], 
            ests.fullsamp[['WEurope']][c('lm3', 'lm4')],     
            ests.privatizers[['WEurope']][c('lm3', 'lm4')])
reg.table.show()

# Print TABLE A2: Homeownership and mobility: Balkans and the FSU {{{2

models <- c(
            ests.fullsamp[['Balkans']][c('lm1', 'lm2')],
            ests.privatizers[['Balkans']][c('lm1', 'lm2')], 
            ests.fullsamp[['FSU']][c('lm1', 'lm2')],     
            ests.privatizers[['FSU']][c('lm1', 'lm2')])
reg.table.show()

# Print TABLE A3: Homeownership and unemployment: Balkans and the FSU {{{2

models <- c(
            ests.fullsamp[['Balkans']][c('lm3', 'lm4')],
            ests.privatizers[['Balkans']][c('lm3', 'lm4')], 
            ests.fullsamp[['FSU']][c('lm3', 'lm4')],     
            ests.privatizers[['FSU']][c('lm3', 'lm4')])
reg.table.show()

# TABLE A1: Robustness checks: dropping countries countries {{{1

# Set the loop and estimate the models: {{{2
rob.checks.countries <- list()
with(lits, {
  for(g in grp[1:2]) {
    countries <- list()
    for(co in unique(country[co.grp == g])) {
      co <<- co
      g <<- g
      print(paste(g, co))
      models <<- lapply(ests.privatizers[[g]], function(spec) {
        try(update(spec, subset = country != co & co.grp ==g &
                   (renter | privatized)))
                      })
      reg.table.stub()
      countries[[co]] <- sub('howner1', co, regs) 
    }
    rob.checks.countries[[g]] <<- unlist(countries)
  }
})

# Print TABLE A1 {{{2

# CEE countries:
rob.checks.countries$CEE

# Western Europe:
rob.checks.countries$WEurope

# TABLE 6: Robustness checks: alternative specifications {{{1

# Set the loop and estimate the models: {{{2
alt.specs <- 
with(lits,
  sapply(levels(co.grp), simplify = FALSE, USE.NAMES = TRUE, function(gr) {
    gr <<- gr
    sapply(ests.privatizers[[gr]][c('lm1', 'lm2', 'lm3', 'lm4')], function(spec) { list(
        ltunemp =  update(spec, ltunemp ~ ., 
                          subset = co.grp == gr & (renter | privatized)),
        age40 = update(spec, subset =
                       co.grp == gr & (renter | privatized) & age.r > 40),
        appartments = update(spec, subset = co.grp == gr & (renter | privatized)
                                        & as.numeric(dwell.type) == 4),
        only.lf = update(spec, subset = co.grp == gr & (renter | privatized) 
                                                     & !inact),
        no.berlin = update(spec, subset = co.grp == gr & (renter | privatized) 
                                                     & Region1 != 'Berlin'),
        no.baltic = update(spec, subset = co.grp == gr & (renter | privatized) 
                                                     & !Baltic),
        no.weight = update(spec, subset = co.grp == gr & (renter | privatized),
                                 weights = NULL) 
        ) })
  })
)

# Put the results together: {{{2
alt.specs.tab <- 
  sapply(rownames(alt.specs$WEurope), function(nam) {
  models <<-  if(nam == 'ltunemp') {
    c(alt.specs$CEE[nam, 3:4], alt.specs$WEurope[nam, 3:4])
  } else {
    c(alt.specs$CEE[nam,    ], alt.specs$WEurope[nam,    ])
  }
  reg.table.stub()
  regs <- sub('howner1', nam, regs)
  regs
})

# Print TABLE 6: Robustness checks: Alt. specs., Privatizers and Renters {{{2
alt.specs.tab
